import re


def match_order_id(txt):
    # http://120.35.30.176/shopping/contractitem.htm?param=2c9be5956ee9fbb0016eed79b3b014ba
    if txt:
        arr = re.findall(r'param=(\w+)', txt.strip())
        if len(arr):
            return arr[0]
    return ''


# 从链接中解析品类编码
def match_cat_id(link):
    # http://cz.fjzfcg.gov.cn/shopping/storegoods_list.htm?gc_id=2c9be595623dc155016251bf76125b21
    if link:
        arr = re.findall(r'gc_id=(\w+)', link.strip())
        if len(arr):
            return arr[0].strip()
    return ''


# 从链接中解析商品编码
def match_sku_id(link):
    # /commodities/220403?p_id=6641&target=_blank
    if link:
        # http://120.35.30.176/shopping/goods.htm?id=2c9be59569fb1f8d0169fc7d0a5f7e1d
        arr = re.findall(r'id=(\w+)', link.strip())
        if len(arr):
            return arr[0]
        else:
            # http://120.35.30.176/shopping/goods_2c9be59569fb1f8d0169fc7d0a5f7e1d.htm
            arr = re.findall(r'goods_(\w+)\.htm', link.strip())
            if len(arr):
                return arr[0]

    return ''


# 从链接中解析供应商编码
def match_supplier_id(link):
    # http://120.35.30.176/shopping/store_2c9be58c5a16f615015a269a445c0197.htm
    if link:
        arr = re.findall(r'store_(\w+)\.htm', link.strip())
        if len(arr):
            return arr[0].strip()

    return ''


# 从链接中解析供应商编码
def match_supplier_id(link):
    # http://120.35.30.176/shopping/store_2c9be58c5a2725d1015a2c2c5b1a0012.htm
    if link:
        arr = re.findall(r'store_(\w+)\.htm', link.strip())
        if len(arr):
            return arr[0].strip()

    return ''


# 从链接中提取总页数
def match_order_pages(link):
    # return ajaxPage("http://120.35.30.176/shopping/newgoodsorder.htm?1=1",5,this)
    if link:
        arr = re.findall(r',\s*(\d+)\s*,this', link.strip())
        if len(arr):
            return arr[0].strip()

    return '1'


def match_spu_id(link):
    # http://120.35.30.176/shopping/goods.htm?id=2c9be595686e27a301687e82cbfd505f
    if link:
        arr = re.findall(r'goods.htm\?id=(\w+)', link.strip())
        if len(arr):
            return arr[0].strip()
        else:
            # http://120.35.30.176/shopping/goods_2c9be595686e27a301687e82cbfd505f.htm
            arr = re.findall(r'goods_(\w+)\.htm', link.strip())
            if len(arr):
                return arr[0].strip()

    return ''


# 测试
if __name__ == '__main__':
    # print(match_order_pages('return ajaxPage("http://120.35.30.176/shopping/newgoodsorder.htm?1=1",5,this)'))
    pass
